Problem Statement¶
In [ ]:
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory
import pandas as pd
import pathlib
from pathlib import Path
import numpy as np
import pandas as pd
# plotting modules
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import plotly as plotly
plotly.offline.init_notebook_mode()
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from keras.utils import to_categorical
from keras.models import load_model
import logging
import warnings
# Suppress TensorFlow warnings
tf.get_logger().setLevel(logging.ERROR)
# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=FutureWarning, module='tensorflow')
import plotly.graph_objects as go
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_curve, ConfusionMatrixDisplay
In [ ]:
tf.config.list_physical_devices('GPU')
Out[ ]:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
In [ ]:
data_folder = pathlib.Path("../../../../../Downloads/archive/plantnet_300K")
In [ ]:
train_path = data_folder / "images_train_over_500"
val_path = data_folder / "images_val_over_500"
test_path = data_folder / "images_test_over_500"
In [ ]:
plant_folders = [x for x in train_path.glob('*') if x.is_dir()]
plant_folders = [x.name for x in plant_folders]
plant_folders[:10]
Out[ ]:
['1355932', '1355936', '1355937', '1355978', '1355990', '1356022', '1356075', '1356111', '1356126', '1356257']
In [ ]:
len(plant_folders)
Out[ ]:
108
In [ ]:
# how many images are in each folder
plant_count = {}
for plant in plant_folders:
plant_count[plant] = len(list(train_path.glob(f'{plant}/*')))
plant_count = {k: v for k, v in sorted(plant_count.items(), key=lambda item: item[1], reverse=True)}
plant_count
Out[ ]:
{'1363227': 7208,
'1392475': 6337,
'1356022': 6140,
'1364099': 5334,
'1355937': 5178,
'1359517': 5063,
'1357330': 4837,
'1358752': 4502,
'1359620': 4285,
'1363128': 4005,
'1363991': 3862,
'1355936': 3419,
'1394460': 3388,
'1363740': 3353,
'1394994': 3183,
'1364173': 3031,
'1359616': 2811,
'1364164': 2788,
'1361824': 2739,
'1361823': 2738,
'1397364': 2700,
'1358095': 2468,
'1363130': 2448,
'1389510': 2385,
'1374048': 2330,
'1367432': 2245,
'1409238': 2241,
'1397268': 2200,
'1393614': 2101,
'1356781': 2007,
'1369887': 1952,
'1393241': 1941,
'1394420': 1899,
'1398178': 1779,
'1408774': 1776,
'1435714': 1762,
'1394591': 1757,
'1385937': 1730,
'1355932': 1716,
'1358094': 1700,
'1393425': 1685,
'1393423': 1671,
'1398592': 1597,
'1408961': 1578,
'1358133': 1570,
'1358766': 1534,
'1361656': 1503,
'1384485': 1440,
'1356257': 1379,
'1358689': 1359,
'1394382': 1348,
'1359498': 1319,
'1362490': 1303,
'1357635': 1291,
'1355990': 1224,
'1363336': 1181,
'1396824': 1118,
'1400100': 1070,
'1418146': 1056,
'1356075': 1040,
'1356382': 1031,
'1360978': 1030,
'1363764': 1028,
'1394454': 1022,
'1364159': 1007,
'1393393': 968,
'1362294': 934,
'1369960': 923,
'1409295': 923,
'1359669': 903,
'1355978': 891,
'1391483': 889,
'1394404': 873,
'1398515': 835,
'1356111': 823,
'1360671': 794,
'1391192': 784,
'1390637': 748,
'1359625': 744,
'1364172': 742,
'1360998': 740,
'1391652': 732,
'1360588': 730,
'1358605': 728,
'1359488': 723,
'1361759': 710,
'1356126': 704,
'1391226': 681,
'1360153': 677,
'1398128': 663,
'1358751': 661,
'1360590': 661,
'1359485': 648,
'1394489': 646,
'1393792': 630,
'1363737': 622,
'1358105': 620,
'1421021': 608,
'1357677': 571,
'1363749': 566,
'1356421': 550,
'1363490': 543,
'1420863': 536,
'1363699': 528,
'1358150': 520,
'1397420': 511,
'1418547': 504,
'1392695': 502}
In [ ]:
# how many folders have more than 500 images
len([k for k, v in plant_count.items() if v > 500])
Out[ ]:
108
In [ ]:
fig, axes = plt.subplots(10, 5, figsize=(15, 15))
axes = axes.ravel()
for i in range(50):
plant = plant_folders[i // 5]
img_files = list(train_path.glob(f'{plant}/*'))
if len(img_files) > i % 5:
img_path = img_files[i % 5]
img = plt.imread(img_path)
axes[i].imshow(img)
axes[i].axis('off')
axes[i].set_title(plant)
else:
axes[i].axis('off')
axes[i].set_title(f'{plant} (No Image)')
plt.tight_layout()
plt.show()
In [ ]:
image_size = (180, 180)
batch_size = 32
train_dataset = tf.keras.utils.image_dataset_from_directory(
train_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
validation_dataset = tf.keras.utils.image_dataset_from_directory(
val_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
test_dataset = tf.keras.utils.image_dataset_from_directory(
test_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
Found 188011 files belonging to 108 classes. Found 23571 files belonging to 108 classes. Found 23565 files belonging to 108 classes.
In [ ]:
class_names = test_dataset.class_names
num_classes = len(class_names)
In [ ]:
#normalization
normalization_layer = layers.Rescaling(1./255)
In [ ]:
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras import layers, models
base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(180, 180, 3))
In [ ]:
data_augmentation = tf.keras.Sequential([
layers.experimental.preprocessing.RandomFlip("horizontal"),
layers.experimental.preprocessing.RandomRotation(0.1),
layers.experimental.preprocessing.RandomZoom(0.1),
layers.experimental.preprocessing.RandomContrast(0.1)
])
In [ ]:
model = tf.keras.Sequential([
# Data augmentation layer
layers.Input(shape=(180, 180, 3)),
normalization_layer,
data_augmentation,
# Base model (DenseNet169)
base_model,
# Additional layers
layers.BatchNormalization(),
layers.GlobalAveragePooling2D(),
layers.Dense(1024, activation='relu'),
layers.Dropout(0.5), # Regularization
layers.Dense(num_classes, activation='softmax')
])
In [ ]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
optimizer.lr.assign(0.0001)
Out[ ]:
<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=1e-04>
In [ ]:
model.summary()
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
rescaling_3 (Rescaling) (None, 180, 180, 3) 0
sequential_6 (Sequential) (None, 180, 180, 3) 0
densenet169 (Functional) (None, 5, 5, 1664) 12642880
batch_normalization_3 (Batc (None, 5, 5, 1664) 6656
hNormalization)
global_average_pooling2d_3 (None, 1664) 0
(GlobalAveragePooling2D)
dense_6 (Dense) (None, 1024) 1704960
dropout_3 (Dropout) (None, 1024) 0
dense_7 (Dense) (None, 108) 110700
=================================================================
Total params: 14,465,196
Trainable params: 14,303,468
Non-trainable params: 161,728
_________________________________________________________________
In [ ]:
callbacks = [
keras.callbacks.ModelCheckpoint(
filepath="./models/densenet_with_augmentation_newer.keras",
save_best_only=True,
monitor="val_loss")
]
In [ ]:
epochs=10
history = model.fit(
train_dataset,
validation_data=validation_dataset,
epochs=epochs,
callbacks=callbacks
)
Epoch 1/10 5876/5876 [==============================] - 2673s 453ms/step - loss: 1.1688 - accuracy: 0.6882 - val_loss: 0.8507 - val_accuracy: 0.7524 Epoch 2/10 5876/5876 [==============================] - 2701s 460ms/step - loss: 0.6733 - accuracy: 0.7998 - val_loss: 0.7690 - val_accuracy: 0.7746 Epoch 3/10 5876/5876 [==============================] - 2697s 459ms/step - loss: 0.5577 - accuracy: 0.8295 - val_loss: 0.6638 - val_accuracy: 0.8047 Epoch 4/10 5876/5876 [==============================] - 2694s 458ms/step - loss: 0.4871 - accuracy: 0.8469 - val_loss: 0.7248 - val_accuracy: 0.7976 Epoch 5/10 5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4392 - accuracy: 0.8598 - val_loss: 0.6050 - val_accuracy: 0.8272 Epoch 6/10 5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4024 - accuracy: 0.8705 - val_loss: 0.6410 - val_accuracy: 0.8174 Epoch 7/10 5876/5876 [==============================] - 2695s 459ms/step - loss: 0.3709 - accuracy: 0.8782 - val_loss: 0.7701 - val_accuracy: 0.7919 Epoch 8/10 5876/5876 [==============================] - 2693s 458ms/step - loss: 0.3453 - accuracy: 0.8855 - val_loss: 0.6142 - val_accuracy: 0.8261 Epoch 9/10 5876/5876 [==============================] - 2684s 457ms/step - loss: 0.3263 - accuracy: 0.8910 - val_loss: 0.6070 - val_accuracy: 0.8297 Epoch 10/10 5876/5876 [==============================] - 2673s 455ms/step - loss: 0.3067 - accuracy: 0.8959 - val_loss: 0.6154 - val_accuracy: 0.8283
In [ ]:
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")
737/737 [==============================] - 50s 68ms/step - loss: 0.5948 - accuracy: 0.8283 Test accuracy: 0.828
In [ ]:
best_model = load_model("./models/densenet_with_augmentation_newer.keras")
best_model.evaluate(test_dataset)
737/737 [==============================] - 50s 65ms/step - loss: 0.5832 - accuracy: 0.8328
Out[ ]:
[0.5832472443580627, 0.8328453302383423]
Test dataset accuracy of 83.28%.